

*This program contains the information needed to create Table 10 of the paper;

*The program references:

1) daily_panel_rfs which  is constructed in  Daily Panel RFS (Intermediate File) code
2) dd_posts18_21q2 which contains due dilligence posts from WSB. This data is collected from the pushshift API;



*The key dependent variables include:
0) ret0 (current day return)
1) ret1 (one-day ahead return)
2) ret2 (two-day ahead return)
3) ret3 (three-day ahead return)
4) ret4 (four-day ahead return)
5) ret5 (five-day ahead return)
6) week2 [6-10 day ahead return]
7) week3 [11-15 day ahead return]
7) week4 [16-20 day ahead return]
8) week5_12 [21-60 day ahead return]


the code below reports reuslts for one-day ahead return - just change dependent variable accordingly;
*note - when ret0 is the dependent variable need to exclude this variable as a control (i.e., drop abn_ret from the set of controls)
*code below reports Panel A (full sample). For panel B (sample that excludes GME and AMC), 
impose the filter "where gme_amc_flag  =0" by removing the "*" in the first part of code; 


*for figure 5 change the dependent variable to  weekX_CUM where X =2 to 12 to obtain weekly cumulative returns;


libname mylib 'g:\Dropbox\Wall Street Bets (Private)\Data';

data info;
set mylib.daily_panel_rfs;;
ret0 = abn_ret;
keep ticker date cum_month gme_amc_flag post_gme
net_dd2  NET_DD2_POST NET_SA2 NET_SA2_POST NON_RESEARCH2 NON_RESEARCH2_POST
DD_and_SA DD_and_SA_POST DD_and_Non_Research DD_and_Non_Research_post net_dd2_pre
 ln_size ln_bm  abn_ret mom5 mom6_26     
BM_MISSING SIZE_MISSING   news_sentiment lag_sent5 lag_sent6_26 SENTIMENT21 ret1 week2-week12 ret0 ret21 total_dd_posts;
WHERE GME_AMC_FLAG =0;
run;

*will add certain return horizons;
proc sort data=INFO;
by ticker descending date;
run;

data INFO;
set INFO;
count  +1;
by ticker;
if first.ticker then count =1;
ret2 = lag(ret1);
ret3 = lag2(ret1);
ret4 = lag3(ret1);
*note ret5 no longer means the five day return, just the one-day return on day 5;
ret5 = lag4(ret1);
if count <=1 then ret2 =.;
if count <=2 then ret3 = .;
if count <=3 then ret4 = .;
if count <=4 then ret4 = .;
week5_12 =  sum( of week5-week12);
drop week5-week12;
run;


proc sort data=info;
by ticker date;
run;

data info;
set info;
count +1;
by ticker;
if non_research_posts = . then non_research_posts =0;

l1_non_research = lag(non_research_posts);
l2_non_research = lag2(non_research_posts);
l3_non_research  = lag3(non_research_posts);
l4_non_research  = lag4(non_research_posts);
l5_non_research  = lag5(non_research_posts);
if count <=1 then l1_non_research = .;
if count <=2 then l2_non_research = .;
if count <=3 then l3_non_research = .;
if count <=4 then l4_non_research = .;
if count <=5 then l5_non_research = .;


past_posts = l1_non_research + l2_non_research + l3_non_research + l4_non_research + l5_non_research ;
if past_posts >1 then high_wsb_posts =1;
else high_wsb_posts =0;


lag_ret = lag(abn_ret);
abs_lag_ret = abs(lag_ret);

lag_abs_ret_rank =  abs_lag_ret;


run;



proc sort data=info;
by date;
run;
proc rank data=info out=info groups =100;
by date;
var lag_abs_ret_rank;
run;



data info;
set info;
if lag_abs_ret_rank >=90 then high_abs_ret =1;
else high_abs_ret = 0;
attention = max(high_abs_ret, high_wsb_posts);
run;


data pp_fund;
set mylib.dd_posts18_21q2;
date = day0;
if pp_num_words = . then delete;

if pp_num_words > funda_num_words then pp =1; else pp =0;

run;



proc sort data=pp_fund;
by ticker  date;
run;

proc univariate noprint data=pp_fund;
by ticker  date;
var purchase;
output out=nonpp_posts sum=buy_dd_posts_nonpp  N=total_dd_posts_nonpp;
where pp ~=1;
run;



data nonpp_posts;
set nonpp_posts;
sell_dd_posts_nonpp =  total_dd_posts_nonpp - buy_dd_posts_nonpp;
NET_DD_POSTS_nonpp = buy_dd_posts_nonpp - sell_dd_posts_nonpp;
 net_dd2_nonpp = NET_DD_POSTS_nonpp;
run;

PROC SORT DATA=nonpp_posts;
BY TICKER  date;
RUN;
PROC SORT DATA=INFO;
BY TICKER  date;
RUN;
DATA INFO;
MERGE INFO nonpp_posts;
BY TICKER  date;
if net_dd2_nonpp >6 then net_dd2_nonpp = 6;
if net_dd2_nonpp =  . then net_dd2_nonpp =0;
net_dd2_pp =   net_dd2 - net_dd2_nonpp;

net_dd2_pp_post = net_dd2_pp * post_gme;


if ret5 = . then delete;
run;



data info2;
set info;

pre_gme = (1- post_gme);


net_dd2_attention = net_dd2 * attention;
net_dd2_attention_post = net_dd2_attention * post_gme;






total_dd_posts2 = total_dd_posts;
if total_dd_posts2 >6 then total_dd_posts2 =6;


if net_dd2_pp >0 then pp_flag =1;
else pp_flag =0;





net_dd2_pp_flag = net_dd2 * pp_flag;
net_dd2_pp_flag_post = net_dd2_pp_flag * post_gme;


attention_pp = max(attention, pp_flag);




net_dd2_attention_pp = net_dd2 * attention_pp;
net_dd2_attention_pp_post = net_dd2 * post_gme * attention_pp;


attention_pp_post_gme = attention_pp * POST_GME;

*will include non-attention posts so that the coefficient on net_dd_attention caputres the total effect rather than incremental effect;

net_dd2_other = net_dd2 * (1- attention_pp);
net_dd2_other_post = net_dd2 * post_gme * (1-attention_pp);


*will include a pre-gme specification to get the third colume (the total effect in post period rather than the incremental effect);


net_dd2_attention_pp_pre = net_dd2 * (1-post_gme) * attention_pp;
net_dd2_other_pre = net_dd2 * (1- post_gme) * (1-attention_pp);



*also add cumulative returns;

week2_cum = sum(of week1-week2);
week3_cum = sum(of week1-week3);
week4_cum = sum(of week1-week4);
week5_cum = sum(of week1-week5);
week6_cum = sum(of week1-week6);
week7_cum = sum(of week1-week7);
week8_cum = sum(of week1-week8);
week9_cum = sum(of week1-week9);
week10_cum = sum(of week1-week10);
week11_cum = sum(of week1-week11);
week12_cum = sum(of week1-week12);



run;
proc sort data=info2;
by date;
run;





proc standard data=info2 out=info2_fe mean=0;
by date;
var ret21  ret1 ret2 ret3 ret4 ret5 week2 week3 week4 week5_12 ret0


net_dd2_other net_dd2_other_pre net_dd2_other_post net_dd2_attention_pp net_dd2_attention_pp_pre net_dd2_attention_pp_post 
NON_RESEARCH2 NON_RESEARCH2_POST NET_SA2 NET_SA2_POST ln_size ln_bm  abn_ret mom5 mom6_26  
week2_cum week3_cum week4_cum week5_cum week6_cum week7_cum week8_cum week9_cum week10_cum week11_cum week12_cum;
  where gme_amc_flag =0;
quit;


*this code generates row 1 and columns 1-4 of Table 10;
*to generate columns 5-6 replace "net_dd2_other" and "net_dd2_attention_pp" with "net_dd2_other_pre" and "net_dd2_attention_pp_pre";

*To obtain all other rows, change the horizon of the dependent variable. We report results for the following dependent variables:

0) ret0 (current day return) [*note - when ret0 is the dependent variable need to exclude this variable as a control (i.e., drop abn_ret from the set of controls)]
1) ret1 (one-day ahead return)
2) ret2 (two-day ahead return)
3) ret3 (three-day ahead return)
4) ret4 (four-day ahead return)
5) ret5 (five-day ahead return)
6) week2 [6-10 day ahead return]
7) week3 [11-15 day ahead return]
7) week4 [16-20 day ahead return]
8) week5_12 [21-60 day ahead return];


*you can also use this code to recreate Figure 5
*repace ret21 with weekX_CUM where X = 2 to 12 to obtain  cumulative returns from weeks2 trhough week12;
*one week return can be computed be replacing week2_cum with ret5;


%let yourdata=INFO2_FE;

%let firmid=ticker ;
%let time=cum_month;

%let y=ret1 ;
%let x=  net_dd2_other net_dd2_other_post net_dd2_attention_pp net_dd2_attention_pp_post NON_RESEARCH2 NON_RESEARCH2_POST NET_SA2 
NET_SA2_POST ln_size ln_bm  abn_ret mom5 mom6_26       news_sentiment lag_sent5 lag_sent6_26   BM_MISSING SIZE_MISSING;
run;




proc surveyreg data=&yourdata;

    cluster &firmid;
    model &y = &x /covb;
    ods output covb=firm;
run;
quit;


*cluster by second dimension (e.g., year);
proc surveyreg data=&yourdata;

    cluster &time;
    model &y = &x /covb;
    ods output covb=year;
run;
quit;

*cluster by intersection of the two dimensions (e.g, firm-year);
proc surveyreg data=&yourdata;
    cluster &firmid &time;
    model &y = &x /covb;
    ods output covb=both;
    ods output parameterestimates=parm;
run;
quit;

*keeps original parameter estimates;
data parm; set parm;
    keep parameter estimate;
run;

*returns a dataset with a scalar for the dimensions of the var/cov matrix. This is needed to extract the square roots of the diagonals later on;
data parm1; set parm;
    n=_n_;
    m=1;
    keep m n;
run;

data parm1; set parm1; by m;
    if last.m;
    keep n;
run;

*uses matrix algebra interface to construct Var-cov matrix and extract the standard errors;
proc iml;
    use both;
    read all var _num_ into Z;
    print Z;
    use firm;
    read all var _num_ into X;
    print X;
    use year;
    read all var _num_ into Y;
    print Y;
    use parm1;
    read all var _num_ into n;
    print n;

    B=X+Y-Z;
    C=I(n);
    D=J(n,1);
    E=C#B;
    F=E*D;
    G=F##.5;

    print B;
    print G;
    create b from G [colname='stderr']; ;
    append from G;
quit;

*creates a dataset called 'results' that contains the parameter estimates, the SE's, and the t-stats;
data results; merge parm B;
    tstat=estimate/stderr;
run;

proc print data=results;
run;




data spec3;
set results;
estimate3 = estimate  ;
obs = _n_;
keep parameter estimate3 obs;
run;
data spec3b;
set results;
estimate3 = tstat ;
obs = _n_ + .5;
keep parameter estimate3 obs;
run;
data spec3;
set spec3 spec3b;
run;
proc sort data=spec3;
by obs;
run;
